from llama_index.core import SimpleDirectoryReader,VectorStoreIndex,Settings,get_response_synthesizer,StorageContext,load_index_from_storage,Document
from typing import Dict
from llama_index.llms.openai import OpenAI as DeepSeeK
from llama_index.llms.openai.utils import ALL_AVAILABLE_MODELS, CHAT_MODELS
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever


DEEPSEEK_MODELS: Dict[str, int] = {
    "deepseek-chat": 128000,
}
ALL_AVAILABLE_MODELS.update(DEEPSEEK_MODELS)
CHAT_MODELS.update(DEEPSEEK_MODELS)

llm = DeepSeeK(api_key='sk-79fa0380ce9c4c3297e51451baab0d09',
                 model="deepseek-chat",
                 api_base="https://api.deepseek.com/v1",
                 temperature=0.7)


Settings.llm = llm

embed_model = HuggingFaceEmbedding(model_name="/home/tom/llms/bge-small-zh-v1.5")
Settings.embed_model = embed_model

documents = SimpleDirectoryReader(input_files=["/home/tom/my_learn/my_danwen/0_playgrounds/0_test_data/SQL.pdf"]).load_data()
print(len(documents))
print(documents)